import numpy as np
from subspace_clustering import K_Subspaces_algo
from sklearn import datasets
from sklearn.cluster import KMeans
import cProfile

def test_inital_solution():
    algo = K_Subspaces_algo(2, dim=1)
    points = np.array([[0,1], [100,2], [1,1], [2,1], [3,3], [1,5],[2,1]])
    algo.get_initial_subspaces(points)

# def test_assign_points():
#     algo = K_Subspaces_algo(2, dim=1)
#     points = np.array([[0,1], [100,2], [1,1], [2,1], [3,3], [1,5],[2,1]])
#     algo.centers = algo.get_initial_subspaces(points)
#     algo.assign_points(points)
    
def test_fit():
    algo = K_Subspaces_algo(2, dim=1)
    points = np.array([[1,1], [100,2], [2,2],[3,3]])
    algo.fit(points)
    score = algo.score(points)
    assert(np.isclose(score,0))

def test_fit_dim2():
    algo = K_Subspaces_algo(2, dim=2)
    points = np.array([[1,1,1], [-5,0,5], [2,0,2], [3,3,3]])
    algo.fit(points)
    score = algo.score(points)
    assert(np.isclose(score,0))
    
    algo_1dim = K_Subspaces_algo(2, dim=1)
    algo_1dim.fit(points)
    score_1dim = algo_1dim.score(points)
    assert(score_1dim > 1)
    
def test_subspace_clustering():
    cov_type = datasets.fetch_covtype()
    points = cov_type.data
    data_subset = points[np.random.choice(points.shape[0], 10000, replace=False)]
    # data_subset = points
    algo_2dim = K_Subspaces_algo(2, dim=2)
    algo_2dim.fit(data_subset)
    score = algo_2dim.score(data_subset)
    algo_1dim = K_Subspaces_algo(2, dim=1)
    algo_1dim.fit(data_subset)
    score_1d = algo_1dim.score(data_subset)
    kmeans_score = KMeans(2).fit(data_subset).inertia_
    print(f"Kmeans score: {kmeans_score:,}")
    print(f"Subspace score: {score:,}")
    assert(score < kmeans_score)
    assert(score<score_1d)

def test_find_best_subspace():
    algo = K_Subspaces_algo(1, dim=2)
    points = np.array([[1,1,1], [2,0,2], [3,3,3]])
    subspace = algo.find_best_subspace(points)
    cost1 = algo.calculate_cost(points, subspace)
    print("dummy to debug")
    assert(np.isclose(cost1, 0))
    points2 = np.array([[-5,0,5]])
    subspace_2 = algo.find_best_subspace(points)
    cost2 = algo.calculate_cost(points2, subspace_2)
    assert(np.isclose(cost2, 0))
    
    print("dummy to debug")
# # test_subspace_clustering()
